1 Introduction

Multiples factors play an important function during the naive CD4-T cell differentiation where Blimp1 (encoded by Prdm1) and Bcl6 are the masters. Taking advantatge of the Cicero’s prediction, we are going to characterize the accessibility pattern not only at th level of specific gene, but also at the level of the co-accessibility regions to which gene belongs.

2 Pre-processing

2.1 Load packages

library(Seurat)
library(Signac)
library(GenomicRanges)
library(pheatmap)
library(dplyr)
library(ggplot2)
library(tidyverse)
library(data.table)
library(ggpubr)
library(plyr)
library(stringr)

2.2 Parameters

color_palette <-  c("#1CFFCE", "#90AD1C", "#C075A6", "#85660D", 
                    "#5A5156", "#AA0DFE", "#F8A19F", "#F7E1A0",
                    "#1C8356", "#FEAF16", "#822E1C", "#C4451C", 
                    "#1CBE4F", "#325A9B", "#F6222E", "#FE00FA",
                    "#FBE426", "#16FF32",  "black",  "#3283FE",
                    "#B00068", "#DEA0FD", "#B10DA1", "#E4E1E3", 
                    "#90AD1C", "#FE00FA", "#85660D", "#3B00FB", 
                    "#822E1C", "coral2",  "#1CFFCE", "#1CBE4F", 
                    "#3283FE", "#FBE426", "#F7E1A0", "#325A9B", 
                    "#2ED9FF", "#B5EFB5", "#5A5156", "#DEA0FD",
                    "#FEAF16", "#683B79", "#B10DA1", "#1C7F93", 
                    "#F8A19F", "dark orange", "#FEAF16", 
                    "#FBE426", "Brown")

cell_type <- "CD4_T"

path_to_obj <- paste0(
  here::here("scATAC-seq/results/R_objects/level_5/"),
  cell_type,
  "/04.",
  cell_type,
  "_integration_peak_calling_level_5.rds",
  sep = ""
)

path_to_cicero <- paste0(
  here::here("scATAC-seq/results/Cicero/"),
  cell_type,"/",
  sep = ""
)

path_to_obj_general <- here::here("scATAC-seq/results/R_objects/8.4.tonsil_peakcalling_annotation_level1_chromVar.rds")

upstream <- 2000

2.3 Functions

plot_dim <- function(seurat, group){
  DimPlot(seurat, 
  group.by = group,
  cols = color_palette,
  pt.size = 0.1,raster=FALSE)
}

mat_heatmap <- function(seurat, features, group,
                        cutree_ncols,cutree_nrows){
  avgexpr_mat <- AverageExpression(
  features = features,
  seurat,
  assays = "peaks_level_5",
  return.seurat = F,
  group.by = group,
  slot = "data")
  
  p1 <- pheatmap(avgexpr_mat$peaks_level_5, scale = "row",
                angle_col = 45,
                show_rownames=T,
               border_color = "white",
               cluster_rows = T,
               cluster_cols = T,
               fontsize_col = 10,
               clustering_distance_rows = "euclidean",
               clustering_distance_cols = "euclidean", 
               clustering_method = "ward.D2",
               cutree_rows = cutree_nrows, 
               cutree_cols = cutree_ncols)
  return(p1)}

3 Load data

3.1 Gene reference data

gene_reference <-  here::here("scATAC-seq/Cicero/genes.gtf")
gtf <- rtracklayer::import(gene_reference)
gtf_df <- as.data.frame(gtf)

gtf_df_gene <- gtf_df[gtf_df$type == "gene",]
DT::datatable(gtf_df_gene)
# We are going to add 2000 bp upstream of each gene to take in acccount the proximal promoter regions
gtf_df_gene$start <- gtf_df_gene$start - upstream

3.2 Entire tonsil data

tonsil <- readRDS(path_to_obj_general)
tonsil
## An object of class Seurat 
## 271530 features across 101075 samples within 2 assays 
## Active assay: peaks_macs (270784 features, 270784 variable features)
##  1 other assay present: chromvar
##  3 dimensional reductions calculated: umap, lsi, harmony
tonsil_peaks <- tonsil@assays$peaks_macs@ranges

plot_dim(tonsil, group = "annotation_level_1") 

3.3 CD4-T cells data

seurat <- readRDS(path_to_obj)
seurat
## An object of class Seurat 
## 93116 features across 16383 samples within 1 assay 
## Active assay: peaks_level_5 (93116 features, 92407 variable features)
##  3 dimensional reductions calculated: umap, lsi, harmony
seurat_peaks <- seurat@assays$peaks_level_5@ranges

plot_dim(seurat, group = "annotation_paper") 

3.3.1 Grouping the cells in Non-Tfh & Tfh groups

At low level of resolution, we want to detect the main epigenomic changes between Non-Tfh vs Tfh. For this reason, we decide to group the cells in 4 clusters: Non-Tfh, Tfh, Central Memory and Naive.

seurat@meta.data <- seurat@meta.data %>% mutate(Group =
  case_when(annotation_paper == "Naive" ~ "Naive",
    annotation_paper == "CM Pre-non-Tfh" ~ "Central Memory",
    annotation_paper == "CM PreTfh" ~ "Central Memory",
    annotation_paper == "T-Trans-Mem" ~ "Non-Tfh",
    annotation_paper == "T-Eff-Mem" ~ "Non-Tfh",
    annotation_paper == "T-helper" ~ "Non-Tfh",
    annotation_paper == "Tfh T:B border" ~ "Tfh",
    annotation_paper == "Tfh-LZ-GC" ~ "Tfh",
    annotation_paper == "GC-Tfh-SAP" ~ "Tfh",
    annotation_paper == "GC-Tfh-0X40" ~ "Tfh",
    annotation_paper == "Tfh-Mem" ~ "Tfh",
    annotation_paper == "Memory T cells" ~ "Non-Tfh",
    annotation_paper == "Eff-Tregs" ~ "Non-Tfh",
    annotation_paper == "non-GC-Tf-regs" ~ "Non-Tfh",
    annotation_paper == "GC-Tf-regs" ~ "Non-Tfh"))

plot_dim(seurat, group = "Group") 

3.4 Master regulators: BCL6 and PRDM1

bcl6 <- gtf_df_gene[which(gtf_df_gene$gene_name %in% "BCL6"),]
bcl6_gr <- makeGRangesFromDataFrame(bcl6)

prdm1 <- gtf_df_gene[which(gtf_df_gene$gene_name %in% "PRDM1"),]
prdm1_gr <- makeGRangesFromDataFrame(prdm1)

4 Cicero

conns <- read.table(paste0(path_to_cicero,
                           "level_5_conns.tsv"), header = T)
ccans <- read.table(paste0(path_to_cicero,
                           "level_5_ccans.tsv"), header = T)

links_T <- ConnectionsToLinks(conns = conns, 
                              ccans = ccans, threshold = 0)
Links(seurat) <- links_T

4.1 BCL6

# Overlapping bcl6 with seurat_peaks
overlapping_bcl6_peaks <- seurat_peaks[queryHits(findOverlaps(seurat_peaks, 
bcl6_gr)),]

bcl6_seurat_peaks_features <- paste0(
  seqnames(overlapping_bcl6_peaks),"-",
  start(overlapping_bcl6_peaks),"-",
  end(overlapping_bcl6_peaks))


# Overlapping bcl6 with links
overlapping <- links_T[queryHits(findOverlaps(links_T, bcl6_gr)),]
overlapping_peaks <- seurat_peaks[queryHits(findOverlaps(seurat_peaks, 
overlapping)),]

features <- paste0(seqnames(overlapping_peaks),"-",
                   start(overlapping_peaks),"-",
                   end(overlapping_peaks))

region_bcl6_links <-  paste0(unique(seqnames(overlapping_peaks)),"-",
                       min(start(overlapping_peaks)),"-",
                       max(end(overlapping_peaks)))

#pdf(file = here::here("scATAC-seq/results/plots/CD4-T/bcl6_coverage_plot.pdf"), 
 #   width = 10, 
  #  height = 6)

region.highlight_enhancer <- GRanges(seqnames = "chr3",
                             ranges = IRanges(start = 187910000, 
                                              end = 188001000))
  
CoveragePlot(object = seurat, 
             region.highlight = region.highlight_enhancer,
             group = "annotation_paper", 
             region = region_bcl6_links)

mat_heatmap(seurat = seurat, 
            features = features, 
            group = "Group",
            cutree_ncols = 2,
            cutree_nrows = 1)

mat_heatmap(seurat = seurat, 
            features = features, 
            group = "annotation_paper",
            cutree_ncols = 2,cutree_nrows = 1)

4.1.1 Potential enhancer of BCL6 at CD4 T cells level

enhancer_region <- overlapping_peaks[start(overlapping_peaks) >= 187900000,]

enhancer_features <- unique(paste0("chr3", "-",
                          start(ranges(enhancer_region)), "-",
                          end(ranges(enhancer_region))))

region_plot <- paste0("chr3", "-",
                      min(start(ranges(enhancer_region))),"-",
                      max(end(ranges(enhancer_region))))


CoveragePlot(object = seurat, 
             region.highlight = region.highlight_enhancer,
             group = "annotation_paper", 
             region = region_plot)

4.1.1.1 AddChromatin module

Finally, we can compute a combinatory score taking into account the peaks located at BCL6 gene level and the peaks located on the super enhancer region.

seurat = AddModuleScore(
  object = seurat,
  features = list(bcl6_seurat_peaks_features),
  name = 'BCL6_gene')

seurat = AddModuleScore(
  object = seurat,
  features = list(enhancer_features),
  name = 'BCL6_enhancer')

bcl6_umap <- FeaturePlot(
  seurat, min.cutoff = "q5", 
  max.cutoff = "q95",
  raster = F,
  features = "BCL6_gene1",
  pt.size = 0.3)

bcl6_enhancer_umap <- FeaturePlot(seurat, 
  min.cutoff = "q5", 
  max.cutoff = "q95",
  raster = F,
  features = "BCL6_enhancer1",
  pt.size = 0.3)


#pdf(file = here::here("scATAC-seq/results/plots/CD4-T/bcl6_umap_enhancer.pdf"), 
#    width = 10, 
#    height = 6)

join = bcl6_umap |  bcl6_enhancer_umap
print(join)

#dev.off()

4.1.2 Potential enhancer of BCL6 at tonsil level

overlapping_bcl6_gw <- tonsil_peaks[queryHits(findOverlaps(tonsil_peaks, bcl6_gr)),]

bcl6_peaks_gw <- paste0("chr3", "-",
start(ranges(overlapping_bcl6_gw)),  "-",
end(ranges(overlapping_bcl6_gw)))

enhancer_region_gw <- tonsil_peaks[seqnames(tonsil_peaks) == "chr3" & 
start(tonsil_peaks) >= 187910000 & 
end(tonsil_peaks) <= 188001000,]

enhancer_features_gw  <- unique(paste0("chr3", "-",
start(ranges(enhancer_region_gw)), "-",
end(ranges(enhancer_region_gw))))

4.1.2.1 AddChromatin module

Finally, we can compute a combinatory score taking into account the peaks located at BCL6 gene level and the peaks located on the super enhancer region.

tonsil = AddModuleScore(
object = tonsil,
features = list(bcl6_peaks_gw),
name = 'BCL6_gene'
)

tonsil = AddModuleScore(
object = tonsil,
features = list(enhancer_features_gw),
name = 'BCL6_enhancer'
)

bcl6_umap <- FeaturePlot(
tonsil, 
min.cutoff = "q5", 
max.cutoff = "q95",
raster = F,
features = "BCL6_gene1",
pt.size = 0.3)

bcl6_enhancer_umap <- FeaturePlot(
tonsil, 
min.cutoff = "q5", 
max.cutoff = "q95",
raster = F,
features = "BCL6_enhancer1",
pt.size = 0.3)


#pdf(file = here::here("scATAC-seq/results/plots/CD4-T/entire_tonsil_umap_enhancer.pdf"), 
 #   width = 10, 
  #  height = 6)

join = bcl6_umap |  bcl6_enhancer_umap
print(join)

4.2 PRDM1

overlapping <- links_T[queryHits(findOverlaps(links_T, prdm1_gr)),]
overlapping_peaks <- seurat_peaks[queryHits(findOverlaps(seurat_peaks, 
                                                         overlapping)),]

features <- paste0(seqnames(overlapping_peaks),"-",
                   start(overlapping_peaks),"-",
                   end(overlapping_peaks))

region_plot <-  paste0(unique(seqnames(overlapping_peaks)),"-",
                       min(start(overlapping_peaks)),"-",
                       max(end(overlapping_peaks)))

#pdf(file = here::here("scATAC-seq/results/plots/CD4-T/prdm1_coverage_plot.pdf"), 
  #  width = 10, 
 #   height = 6)

CoveragePlot(object = seurat, group = "annotation_paper", region = region_plot)

mat_heatmap(seurat = seurat, 
            features = features, 
            group = "Group",
            cutree_ncols = 2,
            cutree_nrows = 1)

mat_heatmap(seurat = seurat, 
            features = features, 
            group = "annotation_paper",
            cutree_ncols = 2,cutree_nrows = 1)

sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS/LAPACK: /Users/pauli/opt/anaconda3/envs/Motif_TF/lib/libopenblasp-r0.3.10.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] plyr_1.8.6           ggpubr_0.4.0         data.table_1.14.0    forcats_0.5.0        stringr_1.4.0        purrr_0.3.4          readr_1.4.0          tidyr_1.1.3          tibble_3.1.2         tidyverse_1.3.0      ggplot2_3.3.5        dplyr_1.0.7          pheatmap_1.0.12      GenomicRanges_1.40.0 GenomeInfoDb_1.24.2  IRanges_2.22.1       S4Vectors_0.26.0     BiocGenerics_0.34.0  Signac_1.2.1         SeuratObject_4.0.2   Seurat_4.0.3         BiocStyle_2.16.1    
## 
## loaded via a namespace (and not attached):
##   [1] utf8_1.2.1                  reticulate_1.20             tidyselect_1.1.1            htmlwidgets_1.5.3           grid_4.0.3                  docopt_0.7.1                BiocParallel_1.22.0         Rtsne_0.15                  munsell_0.5.0               codetools_0.2-17            ica_1.0-2                   DT_0.16                     future_1.21.0               miniUI_0.1.1.1              withr_2.4.2                 colorspace_2.0-2            Biobase_2.48.0              knitr_1.30                  rstudioapi_0.11             ROCR_1.0-11                 ggsignif_0.6.0              tensor_1.5                  listenv_0.8.0               labeling_0.4.2              slam_0.1-47                 GenomeInfoDbData_1.2.3      polyclip_1.10-0             farver_2.1.0                rprojroot_2.0.2             parallelly_1.26.1           vctrs_0.3.8                 generics_0.1.0              xfun_0.18                   lsa_0.73.2                  ggseqlogo_0.1               R6_2.5.0                    DelayedArray_0.14.0         bitops_1.0-7                spatstat.utils_2.2-0        assertthat_0.2.1            promises_1.2.0.1            scales_1.1.1               
##  [43] gtable_0.3.0                globals_0.14.0              goftest_1.2-2               rlang_0.4.11                RcppRoll_0.3.0              splines_4.0.3               rtracklayer_1.48.0          rstatix_0.6.0               lazyeval_0.2.2              spatstat.geom_2.2-0         broom_0.7.2                 BiocManager_1.30.10         yaml_2.2.1                  reshape2_1.4.4              abind_1.4-5                 modelr_0.1.8                crosstalk_1.1.1             backports_1.1.10            httpuv_1.6.1                tools_4.0.3                 bookdown_0.21               ellipsis_0.3.2              spatstat.core_2.2-0         RColorBrewer_1.1-2          ggridges_0.5.3              Rcpp_1.0.6                  zlibbioc_1.34.0             RCurl_1.98-1.2              rpart_4.1-15                deldir_0.2-10               pbapply_1.4-3               cowplot_1.1.1               zoo_1.8-9                   SummarizedExperiment_1.18.1 haven_2.3.1                 ggrepel_0.9.1               cluster_2.1.0               fs_1.5.0                    here_1.0.1                  magrittr_2.0.1              scattermore_0.7             openxlsx_4.2.4             
##  [85] lmtest_0.9-38               reprex_0.3.0                RANN_2.6.1                  SnowballC_0.7.0             fitdistrplus_1.1-5          matrixStats_0.59.0          hms_0.5.3                   patchwork_1.1.1             mime_0.11                   evaluate_0.14               xtable_1.8-4                XML_3.99-0.3                rio_0.5.16                  sparsesvd_0.2               readxl_1.3.1                gridExtra_2.3               compiler_4.0.3              KernSmooth_2.23-17          crayon_1.4.1                htmltools_0.5.1.1           mgcv_1.8-33                 later_1.2.0                 lubridate_1.7.9             DBI_1.1.0                   tweenr_1.0.1                dbplyr_1.4.4                MASS_7.3-53                 Matrix_1.3-4                car_3.0-10                  cli_3.0.0                   igraph_1.2.6                pkgconfig_2.0.3             GenomicAlignments_1.24.0    foreign_0.8-80              plotly_4.9.4.1              spatstat.sparse_2.0-0       xml2_1.3.2                  XVector_0.28.0              rvest_0.3.6                 digest_0.6.27               sctransform_0.3.2           RcppAnnoy_0.0.18           
## [127] spatstat.data_2.1-0         Biostrings_2.56.0           rmarkdown_2.5               cellranger_1.1.0            leiden_0.3.8                fastmatch_1.1-0             uwot_0.1.10                 curl_4.3.2                  shiny_1.6.0                 Rsamtools_2.4.0             lifecycle_1.0.0             nlme_3.1-150                jsonlite_1.7.2              carData_3.0-4               viridisLite_0.4.0           fansi_0.5.0                 pillar_1.6.1                lattice_0.20-41             fastmap_1.1.0               httr_1.4.2                  survival_3.2-7              glue_1.4.2                  qlcMatrix_0.9.7             zip_2.1.1                   png_0.1-7                   ggforce_0.3.2               stringi_1.6.2               blob_1.2.1                  irlba_2.3.3                 future.apply_1.7.0